library(data.table)
library(DBI)
library(ggplot2)
library(cowplot)
library(grid)
library(lme4)
library(lmerTest)
Sys.setlocale("LC_TIME", "en_US.UTF-8") # Print English date format
[1] "en_US.UTF-8"
en_US.UTF-8
# Sys.setlocale("LC_TIME", "nl_NL.UTF-8") # Print Dutch date format
number_format <- scales::number_format(big.mark = ",", decimal.mark = ".") # Print English number format
# number_format <- scales::number_format(big.mark = ".", decimal.mark = ",") # Print Dutch number format
theme_paper <- theme_classic(base_size = 12) +
theme(axis.text = element_text(colour = "black"),
panel.grid.major.y = element_line(colour = "grey92"))
School closure and opening dates
Sources:
date_schools_closed <- as.POSIXct("2020-03-16")
date_schools_opened <- as.POSIXct("2020-06-02")
Handle database connections
db_connect <- function() {
db <- dbConnect(RSQLite::SQLite(), file.path("..", "data", "noordhoff.sqlite"))
return(db)
}
db_disconnect <- function(db) {
dbDisconnect(db)
}
The database contains all SlimStampen data collected via Noordhoff’s platform in three courses: Stepping Stones (English), Grandes Lignes (French), and Neue Kontakte (German).
Trial-level response data are stored in the responses table. Book information, such as the course year, book title, and chapter, are stored in the book_info table.
responses| Column | Type | Explanation |
|---|---|---|
| date | int | UNIX time stamp [s] |
| user_id | chr | unique user identifier |
| method | chr | course |
| start_time | int | elapsed time since session start [ms] |
| rt | int | response time [ms] |
| duration | int | trial duration [ms] |
| fact_id | int | unique fact identifier (within chapter) |
| correct | int | response accuracy |
| answer | chr | user’s response |
| choices | int | number of answer choices (1 == open response) |
| backspace_used | dbl | user pressed backspace during trial |
| backspace_used_first | dbl | user erased first character of response |
| study | int | trial was a study trial |
| answer_language | chr | language of the answer |
| subsession | int | identifies part within learning session |
| book_info_id | chr | unique identifier of book information |
book_info| Column | Type | Explanation |
|---|---|---|
| book_info_id | chr | unique identifier of book information |
| method_group | chr | year and edition |
| book_title | chr | book title (incl. year, level, edition) |
| book_type | chr | type of book |
| chapter | chr | chapter number and title |
Preview first 10 rows
db <- db_connect()
responses_top <- dbGetQuery(db, "SELECT * FROM responses_noduplicates LIMIT 10")
responses_top
book_info_top <- dbGetQuery(db, "SELECT * FROM book_info LIMIT 10")
book_info_top
db_disconnect(db)
There are several measures of learning performance we can look at. The most straight-forward of these are response accuracy and response time.
Important factors to keep in mind: question type (multiple choice or open answer) and language. Note that we cannot distinguish between NL-X and X-X, since we only know the language of the answer.
db <- db_connect()
correct <- dbGetQuery(db,
"SELECT r.method AS 'method',
DATE(r.date + 3600, 'unixepoch') AS 'doy',
r.user_id AS 'user',
r.choices > 1 AS 'mcq',
r.correct AS 'correct',
COUNT(*) AS 'n'
FROM 'responses_noduplicates' r
WHERE r.study == 0
GROUP BY r.method,
DATE(r.date + 3600, 'unixepoch'),
r.user_id,
r.choices > 1,
r.correct"
)
setDT(correct)
db_disconnect(db)
Fill in missing rows (where all trials on a day were correct/incorrect):
correct <- tidyr::complete(correct, tidyr::nesting(method, doy, user, mcq), correct, fill = list(n = 0))
setDT(correct)
correct[, mcq := as.logical(mcq)]
accuracy <- correct[, .(accuracy = n[correct == 1]/sum(n), n = sum(n)), by = .(method, doy, user, mcq)]
Add a school year column (cutoff date: 1 August):
accuracy[, doy_posix := as.POSIXct(doy)]
accuracy[, school_year := ifelse(doy_posix < "2019-08-01", "18/19", "19/20")]
Add sensible course names:
accuracy[, course := ifelse(method == "Grandes Lignes", "French", ifelse(method == "Stepping Stones", "English", "German"))]
Align school years:
accuracy[school_year == "18/19", doy_posix_aligned := as.POSIXct(doy_posix + 365*24*60*60, origin = "1970-01-01")]
accuracy[school_year == "19/20", doy_posix_aligned := doy_posix]
Use cut.Date() to bin dates by week. Each day is assigned the date of the most recent Monday.
accuracy[, doy_posix_week := cut.POSIXt(doy_posix, "week")]
accuracy[, doy_posix_aligned_week := cut.POSIXt(doy_posix_aligned, "week")]
accuracy_by_week_and_user <- accuracy[, .(accuracy = sum(accuracy*n)/sum(n)), by = .(course, school_year, doy_posix_aligned_week, user, mcq)]
accuracy_by_week <- accuracy_by_week_and_user[, .(accuracy_mean = mean(accuracy, na.rm = TRUE),
accuracy_se = sd(accuracy, na.rm = TRUE)/sqrt(.N), n = .N), by = .(course, school_year, doy_posix_aligned_week, mcq)]
Add question type labels:
accuracy_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
Plot response accuracy by week (mean +/- 1 SE).
p_acc <- ggplot(accuracy_by_week[(course == "English" & mcq == TRUE) | course == "French",],
aes(x = as.POSIXct(doy_posix_aligned_week), y = accuracy_mean, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
facet_grid(. ~ course) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1.05, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
geom_ribbon(aes(ymin = accuracy_mean - accuracy_se, ymax = accuracy_mean + accuracy_se, colour = NULL), alpha = 0.2) +
geom_line(aes(lty = question_type)) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(limits = c(.7, 1), labels = scales::percent_format(accuracy = 1)) +
scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
labs(x = NULL,
y = "Accuracy",
colour = "School year",
fill = "School year",
lty = "Question type") +
guides(colour = guide_legend(order = 1),
fill = guide_legend(order = 1),
lty = guide_legend(order = 2)) +
theme_paper
p_acc
Warning: Removed 24 row(s) containing missing values (geom_path).
ggsave("../output/acc_by_question_type.pdf", width = 9, height = 3)
Warning: Removed 24 row(s) containing missing values (geom_path).
ggsave("../output/acc_by_question_type.eps", width = 9, height = 3)
Warning: Removed 24 row(s) containing missing values (geom_path).
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/acc_by_question_type.png", width = 9, height = 3)
Warning: Removed 24 row(s) containing missing values (geom_path).
db <- db_connect()
correct_strat <- dbGetQuery(db,
"SELECT r.method AS 'method',
r.book_info_id AS 'book_info_id',
DATE(r.date + 3600, 'unixepoch') AS 'doy',
r.user_id AS 'user',
r.choices > 1 AS 'mcq',
r.correct AS 'correct',
COUNT(*) AS 'n'
FROM 'responses_noduplicates' r
WHERE r.study == 0
GROUP BY r.method,
r.book_info_id,
DATE(r.date + 3600, 'unixepoch'),
r.user_id,
r.choices > 1,
r.correct"
)
setDT(correct_strat)
db_disconnect(db)
Fill in missing rows (where all trials on a day were correct/incorrect):
correct_strat <- tidyr::complete(correct_strat, tidyr::nesting(method, book_info_id, doy, user, mcq), correct, fill = list(n = 0))
setDT(correct_strat)
correct_strat[, mcq := as.logical(mcq)]
Add book information:
db <- db_connect()
book_info <- dbGetQuery(db, "SELECT DISTINCT * FROM 'book_info'")
db_disconnect(db)
setDT(book_info)
correct_strat[book_info[book_type == "Hoofdboek",], on = "book_info_id", c("method_group", "book_title") := .(i.method_group, i.book_title)]
Add sensible course names:
correct_strat[, course := ifelse(method == "Grandes Lignes", "French", ifelse(method == "Stepping Stones", "English", "German"))]
Add a school year column (cutoff date: 1 August):
correct_strat[, doy_posix := as.POSIXct(doy)]
correct_strat[, school_year := ifelse(doy_posix < "2019-08-01", "18/19", "19/20")]
Simplify level names:
# Keep all distinctions
correct_strat[, book_title_simple := stringr::str_sub(book_title, 3, -10)]
correct_strat[, book_title_simple := factor(book_title_simple, levels = c("vmbo b/lwoo", "vmbo b", "vmbo bk", "vmbo k", "vmbo kgt", "vmbo-gt", "vmbo gt", "vmbo-gt/havo", "vmbo (t)hv", "havo", "havo vwo", "vwo"))]
# Simplify to three levels
correct_strat[, level := dplyr::case_when(
grepl( "hv", book_title) ~ "General secondary\n(havo)",
grepl("vmbo", book_title) ~ "Pre-vocational\n(vmbo)",
grepl("havo", book_title) ~ "General secondary\n(havo)",
grepl("vwo", book_title) ~ "Pre-university\n(vwo)",
TRUE ~ "Other")]
correct_strat[, level := factor(level, levels = c("Other", "Pre-vocational\n(vmbo)", "General secondary\n(havo)", "Pre-university\n(vwo)"))]
Simplify year names:
correct_strat[, year := dplyr::case_when(
method_group == "Leerjaar 1 (5e Ed.)" ~ "Year 1",
method_group == "Leerjaar 2 (5e Ed.)" ~ "Year 2",
method_group == "Leerjaar 3 (5e Ed.)" ~ "Year 3",
method_group == "Leerjaar 3/4 (5e Ed.)" ~ "Year 3/4",
method_group == "Leerjaar 4 (5e Ed.)" ~ "Year 4",
method_group == "Tweede Fase (6e Ed.)" ~ "Tweede Fase",
TRUE ~ "Other")]
Consolidate by day:
accuracy_strat <- correct_strat[, .(accuracy = n[correct == 1]/sum(n), n = sum(n)), by = .(school_year, doy_posix, course, level, year, user, mcq)]
Align school years:
accuracy_strat[school_year == "18/19", doy_posix_aligned := as.POSIXct(doy_posix + 365*24*60*60, origin = "1970-01-01")]
accuracy_strat[school_year == "19/20", doy_posix_aligned := doy_posix]
Use cut.Date() to bin dates by week. Each day is assigned the date of the most recent Monday.
accuracy_strat[, doy_posix_week := cut.POSIXt(doy_posix, "week")]
accuracy_strat[, doy_posix_aligned_week := cut.POSIXt(doy_posix_aligned, "week")]
accuracy_strat_by_week_and_user <- accuracy_strat[, .(accuracy = sum(accuracy*n)/sum(n)), by = .(course, school_year, doy_posix_aligned_week, level, year, user, mcq)]
accuracy_strat_by_week <- accuracy_strat_by_week_and_user[, .(accuracy_mean = mean(accuracy, na.rm = TRUE),
accuracy_se = sd(accuracy, na.rm = TRUE)/sqrt(.N), n = .N), by = .(course, school_year, doy_posix_aligned_week, level, year, mcq)]
Add question type labels:
accuracy_strat_by_week_and_user[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
accuracy_strat_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
How many unique users per group?
accuracy_strat_by_week_and_user[, .(unique_users = length(unique(user))), by = .(course, level, year, school_year, question_type)]
Plot response accuracy by week (mean +/- 1 SE).
p_acc_level_year <- ggplot(accuracy_strat_by_week[course == "French" & level != "Other",],
aes(x = as.POSIXct(doy_posix_aligned_week), y = accuracy_mean, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
facet_grid(level ~ year) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1.05, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
geom_ribbon(aes(ymin = accuracy_mean - accuracy_se, ymax = accuracy_mean + accuracy_se, colour = NULL), alpha = 0.2) +
geom_line(aes(lty = question_type)) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
coord_cartesian(ylim = c(.4, 1)) +
scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
labs(x = NULL,
y = "Accuracy",
colour = "School year",
fill = "School year",
lty = "Question type") +
guides(colour = guide_legend(order = 1),
fill = guide_legend(order = 1),
lty = guide_legend(order = 2)) +
theme_paper
p_acc_level_year
Warning: Removed 11 row(s) containing missing values (geom_path).
ggsave("../output/acc_by_question_type_french_level_year.pdf", width = 9, height = 5)
Warning: Removed 11 row(s) containing missing values (geom_path).
ggsave("../output/acc_by_question_type_french_level_year.eps", width = 9, height = 5)
Warning: Removed 11 row(s) containing missing values (geom_path).
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/acc_by_question_type_french_level_year.png", width = 9, height = 5)
Warning: Removed 11 row(s) containing missing values (geom_path).
p_acc_level_year <- ggplot(accuracy_strat_by_week[course == "English" & level != "Other" & question_type == "Multiple\nchoice",],
aes(x = as.POSIXct(doy_posix_aligned_week), y = accuracy_mean, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
facet_grid(level ~ year) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1.05, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
geom_ribbon(aes(ymin = accuracy_mean - accuracy_se, ymax = accuracy_mean + accuracy_se, colour = NULL), alpha = 0.2) +
geom_line() +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
coord_cartesian(ylim = c(.4, 1)) +
scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
labs(x = NULL,
y = "Accuracy",
colour = "School year",
fill = "School year",
lty = "Question type") +
guides(colour = guide_legend(order = 1),
fill = guide_legend(order = 1),
lty = guide_legend(order = 2)) +
theme_paper
p_acc_level_year
Warning: Removed 5 row(s) containing missing values (geom_path).
ggsave("../output/acc_by_question_type_english_level_year.pdf", width = 9, height = 5)
Warning: Removed 5 row(s) containing missing values (geom_path).
ggsave("../output/acc_by_question_type_english_level_year.eps", width = 9, height = 5)
Warning: Removed 5 row(s) containing missing values (geom_path).
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/acc_by_question_type_english_level_year.png", width = 9, height = 5)
Warning: Removed 5 row(s) containing missing values (geom_path).
accuracy_level_by_week_and_user <- accuracy_strat[, .(accuracy = sum(accuracy*n)/sum(n)), by = .(course, school_year, doy_posix_aligned_week, level, user, mcq)]
accuracy_level_by_week <- accuracy_level_by_week_and_user[, .(accuracy_mean = mean(accuracy, na.rm = TRUE),
accuracy_se = sd(accuracy, na.rm = TRUE)/sqrt(.N), n = .N), by = .(course, school_year, doy_posix_aligned_week, level, mcq)]
Add question type labels:
accuracy_level_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
How many users in each group?
accuracy_level_by_week_and_user[, .(unique_users = length(unique(user))), by = .(course, level, school_year, mcq)]
Plot response accuracy by week (mean +/- 1 SE).
p_acc_level <- ggplot(accuracy_level_by_week[((course == "English" & question_type == "Multiple\nchoice") | course == "French") & level != "Other",],
aes(x = as.POSIXct(doy_posix_aligned_week), y = accuracy_mean, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
facet_grid(level ~ course) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1.05, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
geom_ribbon(aes(ymin = accuracy_mean - accuracy_se, ymax = accuracy_mean + accuracy_se, colour = NULL), alpha = 0.2) +
geom_line(aes(lty = question_type)) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
coord_cartesian(ylim = c(.6, 1)) +
scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
labs(x = NULL,
y = "Accuracy",
colour = "School year",
fill = "School year",
lty = "Question type") +
guides(colour = guide_legend(order = 1),
fill = guide_legend(order = 1),
lty = guide_legend(order = 2)) +
theme_paper
p_acc_level
Warning: Removed 20 row(s) containing missing values (geom_path).
ggsave("../output/acc_by_question_type_level.pdf", width = 9, height = 5)
Warning: Removed 20 row(s) containing missing values (geom_path).
ggsave("../output/acc_by_question_type_level.eps", width = 9, height = 5)
Warning: Removed 20 row(s) containing missing values (geom_path).
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/acc_by_question_type_level.png", width = 9, height = 5)
Warning: Removed 20 row(s) containing missing values (geom_path).
accuracy_year_by_week_and_user <- accuracy_strat[, .(accuracy = sum(accuracy*n)/sum(n)), by = .(course, school_year, doy_posix_aligned_week, year, user, mcq)]
accuracy_year_by_week <- accuracy_year_by_week_and_user[, .(accuracy_mean = mean(accuracy, na.rm = TRUE),
accuracy_se = sd(accuracy, na.rm = TRUE)/sqrt(.N), n = .N), by = .(course, school_year, doy_posix_aligned_week, year, mcq)]
Add question type labels:
accuracy_year_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
How many users in each group?
accuracy_year_by_week_and_user[, .(unique_users = length(unique(user))), by = .(course, year, school_year, mcq)]
Plot response accuracy by week (mean +/- 1 SE).
p_acc_year <- ggplot(accuracy_year_by_week[((course == "English" & question_type == "Multiple\nchoice") | course == "French") & year != "Other",],
aes(x = as.POSIXct(doy_posix_aligned_week), y = accuracy_mean, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
facet_grid(year ~ course) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1.05, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
geom_ribbon(aes(ymin = accuracy_mean - accuracy_se, ymax = accuracy_mean + accuracy_se, colour = NULL), alpha = 0.2) +
geom_line(aes(lty = question_type)) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
coord_cartesian(ylim = c(.6, 1)) +
scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
labs(x = NULL,
y = "Accuracy",
colour = "School year",
fill = "School year",
lty = "Question type") +
guides(colour = guide_legend(order = 1),
fill = guide_legend(order = 1),
lty = guide_legend(order = 2)) +
theme_paper
p_acc_year
Warning: Removed 14 row(s) containing missing values (geom_path).
ggsave("../output/acc_by_question_type_year.pdf", width = 9, height = 5)
Warning: Removed 14 row(s) containing missing values (geom_path).
ggsave("../output/acc_by_question_type_year.eps", width = 9, height = 5)
Warning: Removed 14 row(s) containing missing values (geom_path).
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/acc_by_question_type_year.png", width = 9, height = 5)
Warning: Removed 14 row(s) containing missing values (geom_path).
Fit a mixed effects model to the daily accuracy data:
accuracy[, period := dplyr::case_when(
doy_posix_aligned >= date_schools_opened ~ "post-lockdown",
doy_posix_aligned >= date_schools_closed & doy_posix_aligned < date_schools_opened ~ "during-lockdown",
doy_posix_aligned < date_schools_opened ~ "pre-lockdown"
)]
# Reorder factor levels so that intercept is pre-lockdown open answer in 19/20
accuracy[, period := factor(period, levels = c("pre-lockdown", "during-lockdown", "post-lockdown"))]
accuracy[, school_year := factor(school_year, levels = c("19/20", "18/19"))]
accuracy[, mcq := factor(mcq, levels = c(FALSE, TRUE))]
Since we know the number of trials per day and the proportion correct (accuracy), we can use a binomial GLMM:
if(!file.exists("../output/m_acc_fit.rds")) {
m_acc <- glmer(accuracy ~ period*school_year*mcq + (1 | user) + (1 | course),
data = accuracy[(course == "English" & mcq == TRUE) | course == "French",],
family = "binomial",
weights = n,
nAGQ = 0,
control = glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1e6)))
saveRDS(m_acc, "../output/m_acc_fit.rds")
} else {
m_acc <- readRDS("../output/m_acc_fit.rds")
}
m_acc_summary <- summary(m_acc)
m_acc_summary
Generalized linear mixed model fit by maximum likelihood (Adaptive
Gauss-Hermite Quadrature, nAGQ = 0) [glmerMod]
Family: binomial ( logit )
Formula: accuracy ~ period * school_year * mcq + (1 | user) + (1 | course)
Data:
accuracy[(course == "English" & mcq == TRUE) | course == "French", ]
Weights: n
Control:
glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1e+06))
AIC BIC logLik deviance df.resid
4934770 4934930 -2467371 4934742 668527
Scaled residuals:
Min 1Q Median 3Q Max
-22.1127 -0.9205 0.1614 1.0939 11.4403
Random effects:
Groups Name Variance Std.Dev.
user (Intercept) 0.34971 0.5914
course (Intercept) 0.05504 0.2346
Number of obs: 668541, groups: user, 133419; course, 2
Fixed effects:
Estimate Std. Error
(Intercept) 1.631889 0.165913
periodduring-lockdown 0.264728 0.002760
periodpost-lockdown 0.159475 0.004190
school_year18/19 0.116285 0.002580
mcqTRUE 0.959425 0.002109
periodduring-lockdown:school_year18/19 -0.316933 0.004432
periodpost-lockdown:school_year18/19 -0.251656 0.006437
periodduring-lockdown:mcqTRUE -0.333631 0.002913
periodpost-lockdown:mcqTRUE -0.286879 0.004562
school_year18/19:mcqTRUE 0.004370 0.002681
periodduring-lockdown:school_year18/19:mcqTRUE 0.384829 0.004778
periodpost-lockdown:school_year18/19:mcqTRUE 0.344909 0.007078
z value Pr(>|z|)
(Intercept) 9.836 <2e-16 ***
periodduring-lockdown 95.917 <2e-16 ***
periodpost-lockdown 38.065 <2e-16 ***
school_year18/19 45.073 <2e-16 ***
mcqTRUE 454.977 <2e-16 ***
periodduring-lockdown:school_year18/19 -71.502 <2e-16 ***
periodpost-lockdown:school_year18/19 -39.095 <2e-16 ***
periodduring-lockdown:mcqTRUE -114.525 <2e-16 ***
periodpost-lockdown:mcqTRUE -62.879 <2e-16 ***
school_year18/19:mcqTRUE 1.630 0.103
periodduring-lockdown:school_year18/19:mcqTRUE 80.540 <2e-16 ***
periodpost-lockdown:school_year18/19:mcqTRUE 48.730 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) prddr- prdps- sc_18/19 mcTRUE prdd-:_18/19
prddrng-lck -0.008
prdpst-lckd -0.005 0.336
schl_y18/19 -0.009 0.453 0.290
mcqTRUE -0.010 0.534 0.344 0.596
prdd-:_18/19 0.005 -0.631 -0.212 -0.506 -0.343
prdp-:_18/19 0.003 -0.228 -0.655 -0.347 -0.233 0.239
prddr-:TRUE 0.007 -0.825 -0.271 -0.414 -0.649 0.520
prdps-:TRUE 0.004 -0.263 -0.827 -0.259 -0.408 0.166
s_18/19:TRU 0.007 -0.411 -0.264 -0.838 -0.701 0.451
prdd-:_18/19:TRUE -0.004 0.510 0.167 0.440 0.408 -0.854
prdp-:_18/19:TRUE -0.003 0.175 0.536 0.296 0.272 -0.189
prdp-:_18/19 prdd-:TRUE prdp-:TRUE s_18/19:
prddrng-lck
prdpst-lckd
schl_y18/19
mcqTRUE
prdd-:_18/19
prdp-:_18/19
prddr-:TRUE 0.183
prdps-:TRUE 0.541 0.307
s_18/19:TRU 0.308 0.483 0.303
prdd-:_18/19:TRUE -0.193 -0.617 -0.190 -0.524
prdp-:_18/19:TRUE -0.844 -0.204 -0.647 -0.355
prdd-:_18/19:TRUE
prddrng-lck
prdpst-lckd
schl_y18/19
mcqTRUE
prdd-:_18/19
prdp-:_18/19
prddr-:TRUE
prdps-:TRUE
s_18/19:TRU
prdd-:_18/19:TRUE
prdp-:_18/19:TRUE 0.220
Save coefficients as a table for in the paper:
m_acc_coef <- as.data.frame(m_acc_summary$coefficients)
setDT(m_acc_coef, keep.rownames = TRUE)
m_acc_coef$rn <- c("Intercept \\small{(Period: pre-lockdown, School year: 19/20, Question type: open answer)}",
"Period: lockdown",
"Period: post-lockdown",
"School year: 18/19",
"Question type: multiple choice",
"Period: lockdown $\\times$ School year: 18/19",
"Period: post-lockdown $\\times$ School year: 18/19",
"Period: lockdown $\\times$ Question type: multiple choice",
"Period: post-lockdown $\\times$ Question type: multiple choice",
"School year: 18/19 $\\times$ Question type: multiple choice",
"Period: lockdown $\\times$ School year: 18/19 $\\times$ Question type: multiple choice",
"Period: post-lockdown $\\times$ School year: 18/19 $\\times$ Question type: multiple choice")
# Format p-values
m_acc_coef$`Pr(>|z|)` <- format.pval(m_acc_coef$`Pr(>|z|)`, eps = .001, digits = 3, flag = "0")
m_acc_coef$`Pr(>|z|)` <- sub('^(<)?0[.]', '\\1.', m_acc_coef$`Pr(>|z|)`) # Remove leading zero
cat(knitr::kable(m_acc_coef,
align = c("l","r", "r", "r", "r"),
digits = c(NA, 3, 3, 2, NA),
col.names = c("Effect", "$b$", "SE", "$z$", "$p$"),
format = "latex",
booktabs = TRUE,
escape = FALSE),
file = "../output/m_acc_table.tex")
Visualise the model fit:
acc_fit <- expand.grid(period = c("pre-lockdown", "during-lockdown", "post-lockdown"), school_year = c("18/19", "19/20"), mcq = c(TRUE, FALSE))
acc_fit <- cbind(acc_fit, accuracy = predict(m_acc, type = "response", re.form = NA, newdata = acc_fit))
acc_fit
ggplot(acc_fit, aes(x = period, y = accuracy, colour = school_year, lty = mcq, group = interaction(mcq, school_year))) +
geom_line() +
geom_point() +
scale_y_continuous(limits = c(.7, 1), labels = scales::percent_format()) +
theme_paper
Empirical means:
accuracy_mean <- accuracy[(course == "English" & mcq == TRUE) | course == "French", .(accuracy = sum(accuracy * n)/sum(n)), by = .(period, school_year, mcq, user, course)][, .(accuracy = mean(accuracy), accuracy_sd = sd(accuracy)), by = .(period, school_year, mcq)]
accuracy_mean
ggplot(accuracy_mean, aes(x = period, y = accuracy, colour = school_year, lty = mcq, group = interaction(mcq, school_year))) +
geom_line() +
geom_point() +
scale_y_continuous(limits = c(.7, 1), labels = scales::percent_format()) +
theme_paper
db <- db_connect()
rt <- dbGetQuery(db,
"SELECT r.method AS 'method',
r.book_info_id AS 'book_info_id',
DATE(r.date + 3600, 'unixepoch') AS 'doy',
r.user_id AS 'user',
r.choices > 1 AS 'mcq',
r.rt AS 'rt'
FROM 'responses_noduplicates' r
WHERE r.study == 0
AND r.correct == 1
"
)
setDT(rt)
db_disconnect(db)
doys <- rt[, .(doy = unique(doy))][, doy_posix := as.POSIXct(doy)][]
doys[, doy_posix_week := cut.POSIXt(as.POSIXct(doy), "week")]
doys[, school_year := ifelse(doy_posix < "2019-08-01", "18/19", "19/20")]
doys[school_year == "18/19", doy_posix_aligned := as.POSIXct(doy_posix + 365*24*60*60, origin = "1970-01-01")]
doys[school_year == "19/20", doy_posix_aligned := doy_posix]
doys[, doy_posix_aligned_week := cut.POSIXt(doy_posix_aligned, "week")]
doys[, period := dplyr::case_when(
doy_posix_aligned >= date_schools_opened ~ "post-lockdown",
doy_posix_aligned >= date_schools_closed & doy_posix_aligned < date_schools_opened ~ "during-lockdown",
doy_posix_aligned < date_schools_opened ~ "pre-lockdown"
)]
# Reorder factor levels so that intercept is pre-lockdown in 19/20
doys[, period := factor(period, levels = c("pre-lockdown", "during-lockdown", "post-lockdown"))]
doys[, school_year := factor(school_year, levels = c("19/20", "18/19"))]
rt <- rt[doys, on = "doy"]
rt[, mcq := as.factor(as.logical(mcq))]
rt[, course := ifelse(method == "Grandes Lignes", "French", ifelse(method == "Stepping Stones", "English", "German"))]
Throw out trials with negative RTs (timing errors)
rt <- rt[rt > 0]
rt_med <- rt[, .(rt_median = median(rt)), by = .(school_year, mcq, user, course, doy_posix_week)]
rt_by_week <- rt_med[, .(rt = mean(rt_median), rt_se = sd(rt_median)/sqrt(.N)), by = .(school_year, mcq, course, doy_posix_week)]
Overlap the two school years:
rt_by_week[school_year == "18/19", doy_posix_week_aligned := as.POSIXct(as.POSIXct(doy_posix_week) + 365*24*60*60, origin = "1970-01-01")]
rt_by_week[school_year == "19/20", doy_posix_week_aligned := as.POSIXct(doy_posix_week)]
Add question type labels:
rt_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
rt_by_week[, school_year := factor(school_year, levels = c("18/19", "19/20"))]
Plot response time by week (mean +/- 1 SE).
p_rt <- ggplot(rt_by_week[(course == "English" & mcq == TRUE) | course == "French",],
aes(x = doy_posix_week_aligned, y = rt/1e3, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
facet_grid(. ~ course) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1000, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
geom_ribbon(aes(ymin = rt/1e3 - rt_se/1e3, ymax = rt/1e3 + rt_se/1e3, colour = NULL), alpha = 0.2) +
geom_line(aes(lty = question_type)) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::unit_format(unit = "s", accuracy = .1)) +
coord_cartesian(ylim = c(1.7, 3.7)) +
scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
labs(x = NULL,
y = "Response time",
colour = "School year",
fill = "School year",
lty = "Question type") +
guides(colour = guide_legend(order = 1),
fill = guide_legend(order = 1),
lty = guide_legend(order = 2)) +
theme_paper
p_rt
Warning: Removed 24 row(s) containing missing values (geom_path).
ggsave("../output/rt_by_question_type.pdf", width = 9, height = 3)
Warning: Removed 24 row(s) containing missing values (geom_path).
ggsave("../output/rt_by_question_type.eps", width = 9, height = 3)
Warning: Removed 24 row(s) containing missing values (geom_path).
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/rt_by_question_type.png", width = 9, height = 3)
Warning: Removed 24 row(s) containing missing values (geom_path).
Add book info:
rt[book_info[book_type == "Hoofdboek",], on = "book_info_id", c("method_group", "book_title") := .(i.method_group, i.book_title)]
Simplify level names:
# Keep all distinctions
rt[, book_title_simple := stringr::str_sub(book_title, 3, -10)]
rt[, book_title_simple := factor(book_title_simple, levels = c("vmbo b/lwoo", "vmbo b", "vmbo bk", "vmbo k", "vmbo kgt", "vmbo-gt", "vmbo gt", "vmbo-gt/havo", "vmbo (t)hv", "havo", "havo vwo", "vwo"))]
# Simplify to three levels
rt[, level := dplyr::case_when(
grepl( "hv", book_title) ~ "General secondary\n(havo)",
grepl("vmbo", book_title) ~ "Pre-vocational\n(vmbo)",
grepl("havo", book_title) ~ "General secondary\n(havo)",
grepl("vwo", book_title) ~ "Pre-university\n(vwo)",
TRUE ~ "Other")]
rt[, level := factor(level, levels = c("Other", "Pre-vocational\n(vmbo)", "General secondary\n(havo)", "Pre-university\n(vwo)"))]
Simplify year names:
rt[, year := dplyr::case_when(
method_group == "Leerjaar 1 (5e Ed.)" ~ "Year 1",
method_group == "Leerjaar 2 (5e Ed.)" ~ "Year 2",
method_group == "Leerjaar 3 (5e Ed.)" ~ "Year 3",
method_group == "Leerjaar 3/4 (5e Ed.)" ~ "Year 3/4",
method_group == "Leerjaar 4 (5e Ed.)" ~ "Year 4",
method_group == "Tweede Fase (6e Ed.)" ~ "Tweede Fase",
TRUE ~ "Other")]
rt_strat_med <- rt[, .(rt_median = median(rt)), by = .(school_year, mcq, user, course, level, year, doy_posix_week)]
rt_strat_by_week <- rt_strat_med[, .(rt = mean(rt_median), rt_se = sd(rt_median)/sqrt(.N)), by = .(school_year, mcq, course, level, year, doy_posix_week)]
Overlap the two school years:
rt_strat_by_week[school_year == "18/19", doy_posix_week_aligned := as.POSIXct(as.POSIXct(doy_posix_week) + 365*24*60*60, origin = "1970-01-01")]
rt_strat_by_week[school_year == "19/20", doy_posix_week_aligned := as.POSIXct(doy_posix_week)]
Add question type labels:
rt_strat_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
rt_strat_by_week[, school_year := factor(school_year, levels = c("18/19", "19/20"))]
Plot response time by week (mean +/- 1 SE).
p_rt_level_year <- ggplot(rt_strat_by_week[course == "French",],
aes(x = doy_posix_week_aligned, y = rt/1e3, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
facet_grid(level ~ year) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1000, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
geom_ribbon(aes(ymin = rt/1e3 - rt_se/1e3, ymax = rt/1e3 + rt_se/1e3, colour = NULL), alpha = 0.2) +
geom_line(aes(lty = question_type)) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::unit_format(unit = "s", accuracy = .1)) +
coord_cartesian(ylim = c(1, 4)) +
scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
labs(x = NULL,
y = "Response time",
colour = "School year",
fill = "School year",
lty = "Question type") +
guides(colour = guide_legend(order = 1),
fill = guide_legend(order = 1),
lty = guide_legend(order = 2)) +
theme_paper
p_rt_level_year
Warning: Removed 11 row(s) containing missing values (geom_path).
ggsave("../output/rt_by_question_type_french_level_year.pdf", width = 9, height = 3)
Warning: Removed 11 row(s) containing missing values (geom_path).
ggsave("../output/rt_by_question_type_french_level_year.eps", width = 9, height = 3)
Warning: Removed 11 row(s) containing missing values (geom_path).
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/rt_by_question_type_french_level_year.png", width = 9, height = 3)
Warning: Removed 11 row(s) containing missing values (geom_path).
p_rt_level_year <- ggplot(rt_strat_by_week[course == "English" & question_type == "Multiple\nchoice" & level != "Other",],
aes(x = doy_posix_week_aligned, y = rt/1e3, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
facet_grid(level ~ year) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1000, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
geom_ribbon(aes(ymin = rt/1e3 - rt_se/1e3, ymax = rt/1e3 + rt_se/1e3, colour = NULL), alpha = 0.2) +
geom_line(aes(lty = question_type)) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::unit_format(unit = "s", accuracy = .1)) +
coord_cartesian(ylim = c(1, 4)) +
scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
labs(x = NULL,
y = "Response time",
colour = "School year",
fill = "School year",
lty = "Question type") +
guides(colour = guide_legend(order = 1),
fill = guide_legend(order = 1),
lty = guide_legend(order = 2)) +
theme_paper
p_rt_level_year
Warning: Removed 5 row(s) containing missing values (geom_path).
ggsave("../output/rt_by_question_type_english_level_year.pdf", width = 9, height = 3)
Warning: Removed 5 row(s) containing missing values (geom_path).
ggsave("../output/rt_by_question_type_english_level_year.eps", width = 9, height = 3)
Warning: Removed 5 row(s) containing missing values (geom_path).
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/rt_by_question_type_english_level_year.png", width = 9, height = 3)
Warning: Removed 5 row(s) containing missing values (geom_path).
rt_level_med <- rt[, .(rt_median = median(rt)), by = .(school_year, mcq, user, course, level, doy_posix_week)]
rt_level_by_week <- rt_level_med[, .(rt = mean(rt_median), rt_se = sd(rt_median)/sqrt(.N)), by = .(school_year, mcq, course, level, doy_posix_week)]
Overlap the two school years:
rt_level_by_week[school_year == "18/19", doy_posix_week_aligned := as.POSIXct(as.POSIXct(doy_posix_week) + 365*24*60*60, origin = "1970-01-01")]
rt_level_by_week[school_year == "19/20", doy_posix_week_aligned := as.POSIXct(doy_posix_week)]
Add question type labels:
rt_level_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
rt_level_by_week[, school_year := factor(school_year, levels = c("18/19", "19/20"))]
Plot response time by week (mean +/- 1 SE).
p_rt_level <- ggplot(rt_level_by_week[((course == "English" & question_type == "Multiple\nchoice") | course == "French") & level != "Other",],
aes(x = doy_posix_week_aligned, y = rt/1e3, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
facet_grid(level ~ course) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1000, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
geom_ribbon(aes(ymin = rt/1e3 - rt_se/1e3, ymax = rt/1e3 + rt_se/1e3, colour = NULL), alpha = 0.2) +
geom_line(aes(lty = question_type)) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::unit_format(unit = "s", accuracy = .1)) +
coord_cartesian(ylim = c(1, 6)) +
scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
labs(x = NULL,
y = "Response time",
colour = "School year",
fill = "School year",
lty = "Question type") +
guides(colour = guide_legend(order = 1),
fill = guide_legend(order = 1),
lty = guide_legend(order = 2)) +
theme_paper
p_rt_level
Warning: Removed 20 row(s) containing missing values (geom_path).
ggsave("../output/rt_by_question_type_level.pdf", width = 9, height = 5)
Warning: Removed 20 row(s) containing missing values (geom_path).
ggsave("../output/rt_by_question_type_level.eps", width = 9, height = 5)
Warning: Removed 20 row(s) containing missing values (geom_path).
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/rt_by_question_type_level.png", width = 9, height = 5)
Warning: Removed 20 row(s) containing missing values (geom_path).
rt_year_med <- rt[, .(rt_median = median(rt)), by = .(school_year, mcq, user, course, year, doy_posix_week)]
rt_year_by_week <- rt_year_med[, .(rt = mean(rt_median), rt_se = sd(rt_median)/sqrt(.N)), by = .(school_year, mcq, course, year, doy_posix_week)]
Overlap the two school years:
rt_year_by_week[school_year == "18/19", doy_posix_week_aligned := as.POSIXct(as.POSIXct(doy_posix_week) + 365*24*60*60, origin = "1970-01-01")]
rt_year_by_week[school_year == "19/20", doy_posix_week_aligned := as.POSIXct(doy_posix_week)]
Add question type labels:
rt_year_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
rt_year_by_week[, school_year := factor(school_year, levels = c("18/19", "19/20"))]
Plot response time by week (mean +/- 1 SE).
p_rt_year <- ggplot(rt_year_by_week[((course == "English" & question_type == "Multiple\nchoice") | course == "French") & year != "Other",],
aes(x = doy_posix_week_aligned, y = rt/1e3, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
facet_grid(year ~ course) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1000, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
geom_ribbon(aes(ymin = rt/1e3 - rt_se/1e3, ymax = rt/1e3 + rt_se/1e3, colour = NULL), alpha = 0.2) +
geom_line(aes(lty = question_type)) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::unit_format(unit = "s", accuracy = .1)) +
coord_cartesian(ylim = c(1, 4)) +
scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
labs(x = NULL,
y = "Response time",
colour = "School year",
fill = "School year",
lty = "Question type") +
guides(colour = guide_legend(order = 1),
fill = guide_legend(order = 1),
lty = guide_legend(order = 2)) +
theme_paper
p_rt_year
Warning: Removed 14 row(s) containing missing values (geom_path).
ggsave("../output/rt_by_question_type_year.pdf", width = 9, height = 5)
Warning: Removed 14 row(s) containing missing values (geom_path).
ggsave("../output/rt_by_question_type_year.eps", width = 9, height = 5)
Warning: Removed 14 row(s) containing missing values (geom_path).
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/rt_by_question_type_year.png", width = 9, height = 5)
Warning: Removed 14 row(s) containing missing values (geom_path).
rt_model_dat <- rt[, .(rt_median = median(rt)), by = .(course, school_year, period, doy_posix, mcq, user)]
Fit a generalised linear mixed effects model (assuming a Gamma distribution for RT and an identity link function; Lo & Andrew, 2015) to the daily median RT:
if(!file.exists("../output/m_rt_fit.rds")) {
m_rt <- glmer(rt_median ~ period*school_year*mcq + (1 | user) + (1 | course),
data = rt_model_dat[(course == "English" & mcq == TRUE) | course == "French",],
family = Gamma(link = "identity"),
nAGQ = 0,
control = glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1e6)))
saveRDS(m_rt, "../output/m_rt_fit.rds")
} else {
m_rt <- readRDS("../output/m_rt_fit.rds")
}
m_rt_summary <- summary(m_rt)
m_rt_summary
Generalized linear mixed model fit by maximum likelihood (Adaptive
Gauss-Hermite Quadrature, nAGQ = 0) [glmerMod]
Family: Gamma ( identity )
Formula:
rt_median ~ period * school_year * mcq + (1 | user) + (1 | course)
Data: rt_model_dat[(course == "English" & mcq == TRUE) | course ==
"French", ]
Control:
glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1e+06))
AIC BIC logLik deviance df.resid
10294902 10295073 -5147436 10294872 666488
Scaled residuals:
Min 1Q Median 3Q Max
-2.667 -0.337 -0.069 0.235 205.150
Random effects:
Groups Name Variance Std.Dev.
user (Intercept) 2.935e+05 541.7736
course (Intercept) 2.753e+02 16.5917
Residual 1.390e-01 0.3728
Number of obs: 666503, groups: user, 133398; course, 2
Fixed effects:
Estimate Std. Error t value
(Intercept) 2134.021 12.889 165.574
periodduring-lockdown 208.799 7.148 29.209
periodpost-lockdown 197.658 11.085 17.831
school_year18/19 27.350 6.744 4.055
mcqTRUE 150.272 5.544 27.108
periodduring-lockdown:school_year18/19 -281.173 11.675 -24.084
periodpost-lockdown:school_year18/19 -293.256 17.615 -16.648
periodduring-lockdown:mcqTRUE -211.632 7.688 -27.526
periodpost-lockdown:mcqTRUE -200.943 12.213 -16.454
school_year18/19:mcqTRUE -61.876 7.084 -8.734
periodduring-lockdown:school_year18/19:mcqTRUE 279.138 12.590 22.171
periodpost-lockdown:school_year18/19:mcqTRUE 316.380 19.331 16.366
Pr(>|z|)
(Intercept) < 2e-16 ***
periodduring-lockdown < 2e-16 ***
periodpost-lockdown < 2e-16 ***
school_year18/19 5.01e-05 ***
mcqTRUE < 2e-16 ***
periodduring-lockdown:school_year18/19 < 2e-16 ***
periodpost-lockdown:school_year18/19 < 2e-16 ***
periodduring-lockdown:mcqTRUE < 2e-16 ***
periodpost-lockdown:mcqTRUE < 2e-16 ***
school_year18/19:mcqTRUE < 2e-16 ***
periodduring-lockdown:school_year18/19:mcqTRUE < 2e-16 ***
periodpost-lockdown:school_year18/19:mcqTRUE < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) prddr- prdps- sc_18/19 mcTRUE prdd-:_18/19
prddrng-lck -0.261
prdpst-lckd -0.165 0.330
schl_y18/19 -0.289 0.484 0.308
mcqTRUE -0.349 0.567 0.363 0.621
prdd-:_18/19 0.166 -0.619 -0.204 -0.533 -0.354
prdp-:_18/19 0.109 -0.214 -0.633 -0.354 -0.234 0.228
prddr-:TRUE 0.236 -0.862 -0.278 -0.436 -0.651 0.532
prdps-:TRUE 0.147 -0.272 -0.856 -0.273 -0.408 0.168
s_18/19:TRU 0.256 -0.441 -0.281 -0.880 -0.705 0.484
prdd-:_18/19:TRUE -0.148 0.531 0.171 0.475 0.404 -0.883
prdp-:_18/19:TRUE -0.096 0.176 0.543 0.310 0.263 -0.191
prdp-:_18/19 prdd-:TRUE prdp-:TRUE s_18/19:
prddrng-lck
prdpst-lckd
schl_y18/19
mcqTRUE
prdd-:_18/19
prdp-:_18/19
prddr-:TRUE 0.179
prdps-:TRUE 0.541 0.301
s_18/19:TRU 0.321 0.491 0.307
prdd-:_18/19:TRUE -0.194 -0.615 -0.186 -0.539
prdp-:_18/19:TRUE -0.873 -0.194 -0.634 -0.353
prdd-:_18/19:TRUE
prddrng-lck
prdpst-lckd
schl_y18/19
mcqTRUE
prdd-:_18/19
prdp-:_18/19
prddr-:TRUE
prdps-:TRUE
s_18/19:TRU
prdd-:_18/19:TRUE
prdp-:_18/19:TRUE 0.212
Save coefficients as a table for in the paper:
m_rt_coef <- as.data.frame(m_rt_summary$coefficients)
setDT(m_rt_coef, keep.rownames = TRUE)
m_rt_coef$rn <- c("Intercept \\small{(Period: pre-lockdown, School year: 19/20, Question type: open answer)}",
"Period: lockdown",
"Period: post-lockdown",
"School year: 18/19",
"Question type: multiple choice",
"Period: lockdown $\\times$ School year: 18/19",
"Period: post-lockdown $\\times$ School year: 18/19",
"Period: lockdown $\\times$ Question type: multiple choice",
"Period: post-lockdown $\\times$ Question type: multiple choice",
"School year: 18/19 $\\times$ Question type: multiple choice",
"Period: lockdown $\\times$ School year: 18/19 $\\times$ Question type: multiple choice",
"Period: post-lockdown $\\times$ School year: 18/19 $\\times$ Question type: multiple choice")
# Format p-values
m_rt_coef$`Pr(>|z|)` <- format.pval(m_rt_coef$`Pr(>|z|)`, eps = .001, digits = 3, flag = "0")
m_rt_coef$`Pr(>|z|)` <- sub('^(<)?0[.]', '\\1.', m_rt_coef$`Pr(>|z|)`) # Remove leading zero
cat(knitr::kable(m_rt_coef,
align = c("l","r", "r", "r", "r"),
digits = c(NA, 3, 3, 2, NA),
col.names = c("Effect", "$b$", "SE", "$z$", "$p$"),
format = "latex",
booktabs = TRUE,
escape = FALSE),
file = "../output/m_rt_table.tex")
Visualise the model fit:
rt_fit <- expand.grid(period = c("pre-lockdown", "during-lockdown", "post-lockdown"), school_year = c("18/19", "19/20"), mcq = c(TRUE, FALSE))
rt_fit <- cbind(rt_fit, rt = predict(m_rt, type = "response", re.form = NA, newdata = rt_fit))
rt_fit
ggplot(rt_fit, aes(x = period, y = rt, colour = school_year, lty = mcq, group = interaction(mcq, school_year))) +
geom_line() +
geom_point() +
scale_y_continuous(limits = c(1500, 4000)) +
theme_paper
Empirical means:
rt_mean <- rt_model_dat[(course == "English" & mcq == TRUE) | course == "French", .(rt = mean(rt_median)), by = .(period, school_year, mcq, user, course)][, .(rt = mean(rt), rt_sd = sd(rt)), by = .(period, school_year, mcq, course)]
rt_mean[, school_year := factor(school_year, levels = c("18/19", "19/20"))]
rt_mean
ggplot(rt_mean, aes(x = period, y = rt, colour = school_year, lty = mcq, group = interaction(mcq, school_year))) +
facet_grid(~ course) +
geom_line() +
geom_point() +
scale_y_continuous(limits = c(1500, 3000)) +
theme_paper
p_legend <- get_legend(p_acc)
Warning: Removed 24 row(s) containing missing values (geom_path).
p_acc <- p_acc +
guides(colour = FALSE, fill = FALSE, lty = FALSE)
p_rt <- p_rt +
guides(colour = FALSE, fill = FALSE, lty = FALSE)
Combine plots:
plot_grid(
plot_grid(p_acc, p_rt,
ncol = 1,
labels = c("A", "B")),
p_legend,
rel_widths = c(1, .2)
)
Warning: Removed 24 row(s) containing missing values (geom_path).
Warning: Removed 24 row(s) containing missing values (geom_path).
ggsave("../output/combi_acc_rt.pdf", width = 9, height = 3.5)
ggsave("../output/combi_acc_rt.eps", width = 9, height = 3.5)
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/combi_acc_rt.png", width = 9, height = 3.5)
Get the unique book chapter IDs on each day:
db <- db_connect()
progress <- dbGetQuery(db,
"SELECT DISTINCT r.book_info_id AS 'book_info_id',
r.method AS 'method',
DATE(r.date + 3600, 'unixepoch') AS 'doy',
COUNT(*) AS 'trials'
FROM 'responses_noduplicates' r
GROUP BY r.method,
r.book_info_id,
DATE(r.date + 3600, 'unixepoch');"
)
db_disconnect(db)
setDT(progress)
Join with the book chapter information:
db <- db_connect()
book_info <- dbGetQuery(db, "SELECT DISTINCT * FROM 'book_info'")
db_disconnect(db)
setDT(book_info)
progress[book_info[book_type == "Hoofdboek",], on = "book_info_id", c("method_group", "book_title", "chapter") := .(i.method_group, i.book_title, i.chapter)]
Add sensible course names:
progress[, course := ifelse(method == "Grandes Lignes", "French", ifelse(method == "Stepping Stones", "English", "German"))]
Add a school year column (cutoff date: 1 August):
progress[, doy_posix := as.POSIXct(doy)]
progress[, school_year := ifelse(doy_posix < "2019-08-01", "18/19", "19/20")]
Consolidate count by day and chapter:
progress_by_day <- progress[, .(trials = sum(trials)), by = .(school_year, doy_posix, course, method_group, book_title, chapter)]
Simplify level names:
# Keep all distinctions
progress_by_day[, book_title_simple := stringr::str_sub(book_title, 3, -10)]
progress_by_day[, book_title_simple := factor(book_title_simple, levels = c("vmbo b/lwoo", "vmbo b", "vmbo bk", "vmbo k", "vmbo kgt", "vmbo-gt", "vmbo gt", "vmbo-gt/havo", "vmbo (t)hv", "havo", "havo vwo", "vwo"))]
# Simplify to three levels
progress_by_day[, level := dplyr::case_when(
grepl( "hv", book_title) ~ "General secondary (havo)",
grepl("vmbo", book_title) ~ "Pre-vocational (vmbo)",
grepl("havo", book_title) ~ "General secondary (havo)",
grepl("vwo", book_title) ~ "Pre-university (vwo)",
TRUE ~ "Other")]
progress_by_day[, level := factor(level, levels = c("Other", "Pre-vocational (vmbo)", "General secondary (havo)", "Pre-university (vwo)"))]
Simplify year names:
progress_by_day[, year := dplyr::case_when(
method_group == "Leerjaar 1 (5e Ed.)" ~ "Year 1",
method_group == "Leerjaar 2 (5e Ed.)" ~ "Year 2",
method_group == "Leerjaar 3 (5e Ed.)" ~ "Year 3",
method_group == "Leerjaar 3/4 (5e Ed.)" ~ "Year 3/4",
method_group == "Leerjaar 4 (5e Ed.)" ~ "Year 4",
method_group == "Tweede Fase (6e Ed.)" ~ "Tweede Fase",
TRUE ~ "Other")]
Simplify chapter names:
# In most cases, the chapter name starts with a number
progress_by_day[, chapter_simple := factor(as.numeric(stringr::str_extract(chapter, "^\\d{1,2}")))]
# Remaining cases:
unique(progress_by_day[is.na(chapter_simple),]$chapter)
[1] "BS2 Dienstleistung"
[2] "BS5 Reisen"
[3] "BS1 Familie und Beziehungen"
[4] "BS3 Dienstleistung"
[5] "BS4 Reisen und Verkehr"
[6] "Lernliste Brückenschlag"
[7] "BS2 Freizeit"
[8] "BS1 Schule und Ausbildung"
[9] "Bridging the Gap Year 2"
[10] "Bridging the Gap Year 1"
[11] "Bridging the Gap Exam Preparation"
[12] "Exam Preparation"
[13] "Bridging the Gap mbo"
[14] "Bridging the Gap havo"
BS2 Dienstleistung
BS5 Reisen
BS1 Familie und Beziehungen
BS3 Dienstleistung
BS4 Reisen und Verkehr
Lernliste Brückenschlag
BS2 Freizeit
BS1 Schule und Ausbildung
Bridging the Gap Year 2
Bridging the Gap Year 1
Bridging the Gap Exam Preparation
Exam Preparation
Bridging the Gap mbo
Bridging the Gap havo
# Combine these chapters into an "other" category
progress_by_day[is.na(chapter_simple), chapter_simple := "O"]
Align school years:
progress_by_day[school_year == "18/19", doy_posix_aligned := as.POSIXct(doy_posix + 365*24*60*60, origin = "1970-01-01")]
progress_by_day[school_year == "19/20", doy_posix_aligned := doy_posix]
Use cut.Date() to bin dates by week and month. Each day is assigned the date of the most recent Monday.
progress_by_day[, doy_posix_aligned_week := cut.POSIXt(doy_posix_aligned, "week")]
progress_by_day[, doy_posix_aligned_month := cut.POSIXt(doy_posix_aligned, "month")]
Calculate proportions by week and month:
progress_by_week <- progress_by_day[, .(trials = sum(trials)), by = .(school_year, doy_posix_aligned_week, course, level, year, chapter_simple)]
progress_by_week[, prop := trials/sum(trials), by = .(school_year, doy_posix_aligned_week, course, level, year)]
progress_by_month <- progress_by_day[, .(trials = sum(trials)), by = .(school_year, doy_posix_aligned_month, course, level, year, chapter_simple)]
progress_by_month[, prop := trials/sum(trials), by = .(school_year, doy_posix_aligned_month, course, level, year)]
setorder(progress_by_week, chapter_simple)
setorder(progress_by_month, chapter_simple)
p_french_y1 <- ggplot(progress_by_week[course == "French" & year == "Year 1"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
facet_grid(school_year ~ level) +
geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
guides(fill = guide_legend(ncol = 2)) +
labs(x = NULL,
y = "Share of trials",
fill = "Chapter") +
theme_paper
p_french_y2 <- ggplot(progress_by_week[course == "French" & year == "Year 2"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
facet_grid(school_year ~ level) +
geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
guides(fill = guide_legend(ncol = 2)) +
labs(x = NULL,
y = "Share of trials",
fill = "Chapter") +
theme_paper
p_french_y3 <- ggplot(progress_by_week[course == "French" & year == "Year 3/4"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
facet_grid(school_year ~ level) +
geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
guides(fill = guide_legend(ncol = 2)) +
labs(x = NULL,
y = "Share of trials",
fill = "Chapter") +
theme_paper
p_progress_french <- plot_grid(p_french_y1, p_french_y2, p_french_y3,
ncol = 1,
align = "hv", axis = "tblr",
labels = c("Year 1", "Year 2", "Year 3/4"),
hjust = -0.1,
scale = .95)
Warning: Removed 48 rows containing missing values (position_stack).
Warning: Removed 40 rows containing missing values (geom_col).
Warning: Removed 46 rows containing missing values (position_stack).
Warning: Removed 41 rows containing missing values (geom_col).
Warning: Removed 24 rows containing missing values (position_stack).
Warning: Removed 23 rows containing missing values (geom_col).
p_progress_french
ggsave("../output/progress_french.pdf", width = 9, height = 9)
ggsave("../output/progress_french.eps", width = 9, height = 9)
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/progress_french.png", width = 9, height = 6)
Did the share of trials change between school years? We can simplify the analysis by aggregating over the whole lockdown period.
progress_lockdown <- progress_by_day[between(doy_posix_aligned, date_schools_closed, date_schools_opened), .(trials = sum(trials)), by = .(school_year, course, level, year, chapter_simple)]
# Fill in missing rows (occurs when chapter was only studied in one of the two years)
progress_lockdown <- as.data.table(tidyr::complete(progress_lockdown, tidyr::nesting(course, level, year, chapter_simple), school_year, fill = list(trials = 0)))
progress_lockdown[, prop := trials/sum(trials), by = .(school_year, course, level, year)]
setorder(progress_lockdown, chapter_simple)
ggplot(progress_lockdown[course == "French"], aes(x = school_year, y = prop, fill = chapter_simple, group = school_year)) +
facet_grid(level ~ year) +
geom_col(colour = NA) +
scale_y_continuous(labels = scales::percent_format(), limits = c(0,1)) +
scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
labs(x = NULL,
y = "Share of trials",
fill = "Chapter",
title = "French") +
theme_paper
Perform a chi-square test of homogeneity to determine whether school years are significantly different.
for (y in sort(unique(progress_lockdown$year))) {
for (l in levels(progress_lockdown$level)) {
d <- progress_lockdown[course == "French" & year == y & level == l]
if (nrow(d) > 0) {
print(paste("French", y, l, collapse= " "))
print(
chisq.test(
dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, school_year := NULL]
)
)
}
}
}
[1] "French Year 1 Pre-vocational (vmbo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 26799, df = 5, p-value < 2.2e-16
[1] "French Year 1 General secondary (havo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 229477, df = 5, p-value < 2.2e-16
[1] "French Year 1 Pre-university (vwo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 40312, df = 5, p-value < 2.2e-16
[1] "French Year 2 Pre-vocational (vmbo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 13882, df = 5, p-value < 2.2e-16
[1] "French Year 2 General secondary (havo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 50486, df = 5, p-value < 2.2e-16
[1] "French Year 2 Pre-university (vwo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 46882, df = 5, p-value < 2.2e-16
[1] "French Year 3/4 Pre-vocational (vmbo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 30269, df = 7, p-value < 2.2e-16
[1] "French Year 3/4 General secondary (havo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 23169, df = 5, p-value < 2.2e-16
[1] "French Year 3/4 Pre-university (vwo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 21545, df = 5, p-value < 2.2e-16
Conclusion: all tests indicate a difference in proportions between school years (p << 0.001).
Visualise the change between school years:
progress_lockdown[, prop_change := prop[school_year == "19/20"] - prop[school_year == "18/19"], by = .(course, level, year, chapter_simple)]
ggplot(progress_lockdown[school_year == "19/20" & course == "French"], aes(x = chapter_simple, y = (prop_change * 100), colour = chapter_simple, group = school_year)) +
facet_grid(level ~ year, scales = "free_x") +
geom_hline(yintercept = 0, lty = 2) +
geom_segment(aes(xend = chapter_simple), yend = 0) +
geom_point() +
scale_y_continuous(limits = c(-25, 25)) +
scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
guides(colour = FALSE) +
labs(x = "Chapter",
y = "Change in trial share\n(percentage points)",
title = "French") +
theme_paper
Are these changes really important? We may expect a certain amount of fluctuation between any pair of school years. We don’t have data from before the 18/19 school year, but we can look at how the magnitude of changes during the lockdown period compares to changes earlier in the school year.
To keep things as comparable as possible, use a sliding time window with the same size as the lockdown period:
time_window <- as.numeric(round(date_schools_opened - date_schools_closed))
time_window
[1] 78
date_range <- sort(unique(progress_by_day$doy_posix_aligned))
date_range <- date_range[date_range < date_schools_closed]
prop_change_window <- data.table()
for (i in 1:(length(date_range) - as.numeric(time_window))) {
d <- date_range[i:(i + time_window - 1)]
progress_window <- progress_by_day[course %in% c("French", "English") & doy_posix_aligned %in% d,
.(trials = sum(trials)),
by = .(school_year, course, level, year, chapter_simple)]
# Fill in missing rows (occurs when chapter was only studied in one of the two years)
progress_window <- as.data.table(tidyr::complete(progress_window, tidyr::nesting(course, level, year, chapter_simple), school_year, fill = list(trials = 0)))
progress_window[, prop := trials/sum(trials), by = .(school_year, course, level, year)]
progress_window[, prop_change := prop[school_year == "19/20"] - prop[school_year == "18/19"], by = .(course, level, year, chapter_simple)]
prop_change_window <- rbind(prop_change_window, progress_window[school_year == "19/20",][,window := i][,date_min := min(d)][,date_max := max(d)])
}
The density of year-to-year changes can be visualised by time window:
ggplot(prop_change_window, aes(x = prop_change * 100, y = window, group = window)) +
ggridges::geom_density_ridges(alpha = 0.1, scale = 25, fill = NA) +
labs(x = "Change in trial share\n(percentage points)",
y = "Time window") +
theme_paper
Picking joint bandwidth of 0.584
Compare the aggregated density to the changes during the lockdown period:
prop_change_combined <- rbind(prop_change_window[, period := "Pre-lockdown"], progress_lockdown[course %in% c("French", "English") & school_year == "19/20", period := "Lockdown"], fill = TRUE)
prop_change_combined[, period := factor(period, levels = c("Pre-lockdown", "Lockdown"))]
ggplot(prop_change_combined, aes(x = prop_change, colour = period)) +
geom_density() +
scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
labs(x = "Change in trial share\n(percentage points)",
y = "Density",
colour = NULL) +
theme_paper
prop_change_sd <- prop_change_window[, .(sd = sd(prop_change) * 100), by = .(course, year, level)]
Add boundaries based on the typical spread to the change plot:
p_change_french <- ggplot(progress_lockdown[school_year == "19/20" & course == "French"], aes(colour = chapter_simple)) +
facet_grid(year ~ level, scales = "free_x") +
geom_rect(data = prop_change_sd[course == "French"], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
geom_rect(data = prop_change_sd[course == "French"], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
geom_hline(yintercept = 0, lty = 2) +
geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0) +
geom_point(aes(x = chapter_simple, y = (prop_change * 100))) +
scale_y_continuous(breaks = c(-20, 0, 20)) +
scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
guides(colour = FALSE) +
labs(x = "Chapter",
y = "Change in trial share\n(percentage points)") +
theme_paper +
theme(panel.grid.major.y = element_blank())
p_change_french
ggsave("../output/progress_change_french.pdf", width = 5, height = 4)
ggsave("../output/progress_change_french.eps", width = 5, height = 4)
ggsave("../output/progress_change_french.png", width = 9, height = 3)
Make a combination plot for in the paper:
plot_grid(p_french_y1, p_french_y2, p_french_y3, p_change_french,
ncol = 1,
align = "hv", axis = "tblr",
labels = c("Year 1", "Year 2", "Year 3/4", "Change"),
rel_heights = c(1, 1, 1, 1.5),
hjust = -0.1,
scale = .95)
Warning: Removed 48 rows containing missing values (position_stack).
Warning: Removed 40 rows containing missing values (geom_col).
Warning: Removed 46 rows containing missing values (position_stack).
Warning: Removed 41 rows containing missing values (geom_col).
Warning: Removed 24 rows containing missing values (position_stack).
Warning: Removed 23 rows containing missing values (geom_col).
ggsave("../output/progress_combi_french.pdf", width = 9, height = 9)
ggsave("../output/progress_combi_french.eps", width = 9, height = 9)
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/progress_combi_french.png", width = 9, height = 9)
p_change_french_y1 <- ggplot(progress_lockdown[school_year == "19/20" & course == "French" & year == "Year 1"], aes(colour = chapter_simple)) +
facet_grid(. ~ level, scales = "free_x") +
geom_rect(data = prop_change_sd[course == "French" & year == "Year 1"], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
geom_rect(data = prop_change_sd[course == "French" & year == "Year 1"], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
geom_hline(yintercept = 0, lty = 2) +
geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
scale_y_continuous(breaks = c(-20, 0, 20), limits = c(-30, 30), labels = scales::number_format(suffix = " pp")) +
scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
guides(colour = FALSE) +
labs(x = NULL,
y = "Change") +
theme_paper +
theme(panel.grid.major.y = element_blank(),
strip.background = element_blank(),
strip.text.x = element_blank(),
panel.background = element_rect(fill = "transparent", colour = NA),
plot.background = element_rect(fill = "transparent", colour = NA))
p_change_french_y2 <- ggplot(progress_lockdown[school_year == "19/20" & course == "French" & year == "Year 2"], aes(colour = chapter_simple)) +
facet_grid(. ~ level, scales = "free_x") +
geom_rect(data = prop_change_sd[course == "French" & year == "Year 2"], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
geom_rect(data = prop_change_sd[course == "French" & year == "Year 2"], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
geom_hline(yintercept = 0, lty = 2) +
geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
scale_y_continuous(breaks = c(-20, 0, 20), limits = c(-30, 30), labels = scales::number_format(suffix = " pp")) +
scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
guides(colour = FALSE) +
labs(x = NULL,
y = "Change") +
theme_paper +
theme(panel.grid.major.y = element_blank(),
strip.background = element_blank(),
strip.text.x = element_blank(),
panel.background = element_rect(fill = "transparent", colour = NA),
plot.background = element_rect(fill = "transparent", colour = NA))
p_change_french_y3 <- ggplot(progress_lockdown[school_year == "19/20" & course == "French" & year == "Year 3/4"], aes(colour = chapter_simple)) +
facet_grid(. ~ level, scales = "free_x") +
geom_rect(data = prop_change_sd[course == "French" & year == "Year 3/4"], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
geom_rect(data = prop_change_sd[course == "French" & year == "Year 3/4"], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
geom_hline(yintercept = 0, lty = 2) +
geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
scale_y_continuous(breaks = c(-20, 0, 20), limits = c(-30, 30), labels = scales::number_format(suffix = " pp")) +
scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
guides(colour = FALSE) +
labs(x = NULL,
y = "Change") +
theme_paper +
theme(panel.grid.major.y = element_blank(),
strip.background = element_blank(),
strip.text.x = element_blank(),
panel.background = element_rect(fill = "transparent", colour = NA),
plot.background = element_rect(fill = "transparent", colour = NA))
filler_plot <- qplot() +
theme_nothing() +
theme(panel.background = element_rect(fill = "transparent", colour = NA),
plot.background = element_rect(fill = "transparent", colour = NA))
plot_grid(
filler_plot,
p_french_y1, filler_plot, p_change_french_y1, filler_plot,
p_french_y2, filler_plot, p_change_french_y2, filler_plot,
p_french_y3, filler_plot, p_change_french_y3,
ncol = 1,
align = "hv", axis = "tblr",
labels = c(NA,
"Year 1", NA, NA, NA,
"Year 2", NA, NA, NA,
"Year 3/4", NA, NA),
rel_heights = c(.1,
1, -.2, .75, .1,
1, -.2, .75, .1,
1, -.2, .75),
hjust = -0.1,
vjust = -0.1,
scale = .95)
Warning: Removed 48 rows containing missing values (position_stack).
Warning: Removed 40 rows containing missing values (geom_col).
Warning: Removed 46 rows containing missing values (position_stack).
Warning: Removed 41 rows containing missing values (geom_col).
Warning: Removed 24 rows containing missing values (position_stack).
Warning: Removed 23 rows containing missing values (geom_col).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
ggsave("../output/progress_combi_alt_french.pdf", width = 9, height = 9)
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
ggsave("../output/progress_combi_alt_french.eps", width = 9, height = 9)
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/progress_combi_alt_french.png", width = 9, height = 9)
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Make a table of the changes:
progress_table <- progress_lockdown[school_year == "19/20", .(course, year, level, chapter_simple, prop_change)][prop_change_sd, on = .(course, year, level)]
progress_table <- progress_table[level != "Other"][, .(Course = course, Year = sub(year, pattern = "Year ", replacement = ""), Level = ifelse(grepl("vmbo", level), "vmbo", ifelse(grepl("havo", level), "havo", "vwo")), `Baseline (1 SD)` = scales::percent(sd/100, accuracy = .01, suffix = ""), Chapter = chapter_simple, Change = scales::percent(prop_change, accuracy = .01, prefix = ifelse(prop_change > 0, "+", ""), suffix = ""))]
progress_table <- tidyr::pivot_wider(progress_table, Course:`Baseline (1 SD)`, names_from = Chapter, names_prefix = "Ch. ", values_from = Change)
setorder(progress_table, -Course, Year)
progress_table
options(knitr.kable.NA = "")
progress_table_tex <- knitr::kable(progress_table,
align = c("l", "l", "l", rep("r", 12)),
row.names = FALSE,
format = "latex",
booktabs = TRUE,
escape = FALSE)
progress_table_tex <- kableExtra::collapse_rows(progress_table_tex,
columns = 1:2,
valign = "top",
latex_hline = "major")
cat(progress_table_tex, file = "../output/progress_table.tex")
NOTE: chapters without a number (Bridging the Gap, Exam Preparation) are shown as “O” in the plot. They don’t seem to fit neatly in the chapter sequence, so I’m grouping them together.
p_english_y1 <- ggplot(progress_by_week[level != "Other"][, level := factor(level)][course == "English" & year == "Year 1"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
facet_grid(school_year ~ level, drop = FALSE) +
geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
guides(fill = guide_legend(ncol = 2)) +
labs(x = NULL,
y = "Share of trials",
fill = "Chapter") +
theme_paper
p_english_y2 <- ggplot(progress_by_week[level != "Other"][, level := factor(level)][course == "English" & year == "Year 2"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
facet_grid(school_year ~ level, drop = FALSE) +
geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
guides(fill = guide_legend(ncol = 2)) +
labs(x = NULL,
y = "Share of trials",
fill = "Chapter") +
theme_paper
p_english_y3 <- ggplot(progress_by_week[level != "Other"][, level := factor(level)][course == "English" & year == "Year 3"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
facet_grid(school_year ~ level, drop = FALSE) +
geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
guides(fill = guide_legend(ncol = 2)) +
labs(x = NULL,
y = "Share of trials",
fill = "Chapter") +
theme_paper
p_english_y4 <- ggplot(progress_by_week[level != "Other"][, level := factor(level)][course == "English" & year == "Year 4"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
facet_grid(school_year ~ level, drop = FALSE) +
geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
scale_x_datetime(expand = c(0, 0),
breaks = as.POSIXct(c(
"2019-10-01 02:00:00 CET",
"2019-12-01 02:00:00 CET",
"2020-02-01 02:00:00 CET",
"2020-04-01 02:00:00 CET",
"2020-06-01 02:00:00 CET")),
limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
date_labels = "%b") +
scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
guides(fill = guide_legend(ncol = 2)) +
labs(x = NULL,
y = "Share of trials",
fill = "Chapter") +
theme_paper
p_progress_english <- plot_grid(p_english_y1, p_english_y2, p_english_y3, p_english_y4,
ncol = 1,
align = "hv", axis = "tblr",
labels = c("Year 1", "Year 2", "Year 3", "Year 4"),
hjust = -0.1,
scale = .95)
Warning: Removed 61 rows containing missing values (position_stack).
Warning: Removed 61 rows containing missing values (geom_col).
Warning: Removed 69 rows containing missing values (position_stack).
Warning: Removed 60 rows containing missing values (geom_col).
Warning: Removed 49 rows containing missing values (position_stack).
Warning: Removed 50 rows containing missing values (geom_col).
Warning: Removed 7 rows containing missing values (position_stack).
Warning: Removed 4 rows containing missing values (geom_col).
p_progress_english
ggsave("../output/progress_english.pdf", width = 9, height = 9)
ggsave("../output/progress_english.eps", width = 9, height = 9)
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/progress_english.png", width = 9, height = 9)
Did the share of trials change between school years?
ggplot(progress_lockdown[course == "English" & level != "Other"], aes(x = school_year, y = prop, fill = chapter_simple, group = school_year)) +
facet_grid(level ~ year) +
geom_col(colour = NA) +
scale_y_continuous(labels = scales::percent_format(), limits = c(0,1)) +
scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
labs(x = NULL,
y = "Share of trials",
fill = "Chapter",
title = "English") +
theme_paper
Change between school years:
p_change_english <- ggplot(progress_lockdown[school_year == "19/20" & course == "English" & level != "Other"], aes(colour = chapter_simple)) +
facet_grid(year ~ level, scales = "free_x") +
geom_rect(data = prop_change_sd[course == "English" & level != "Other"], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
geom_rect(data = prop_change_sd[course == "English" & level != "Other"], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
geom_hline(yintercept = 0, lty = 2) +
geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0) +
geom_point(aes(x = chapter_simple, y = (prop_change * 100))) +
scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
guides(colour = FALSE) +
labs(x = "Chapter",
y = "Change in trial share\n(percentage points)") +
theme_paper +
theme(panel.grid.major.y = element_blank())
p_change_english
ggsave("../output/progress_change_english.pdf", width = 9, height = 6)
ggsave("../output/progress_change_english.eps", width = 9, height = 6)
ggsave("../output/progress_change_english.png", width = 9, height = 6)
Perform a chi-square test of homogeneity to determine whether school years are significantly different.
for (y in sort(unique(progress_lockdown$year))) {
for (l in levels(progress_lockdown$level)) {
d <- progress_lockdown[course == "English" & year == y & level == l]
if (nrow(d) > 0) {
print(paste("English", y, l, collapse= " "))
print(
chisq.test(
dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, school_year := NULL]
)
)
}
}
}
[1] "English Year 1 Other"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 1902.8, df = 7, p-value < 2.2e-16
[1] "English Year 1 Pre-vocational (vmbo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 59650, df = 7, p-value < 2.2e-16
[1] "English Year 1 General secondary (havo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 236327, df = 7, p-value < 2.2e-16
[1] "English Year 1 Pre-university (vwo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 47589, df = 7, p-value < 2.2e-16
[1] "English Year 2 Other"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 4117.3, df = 7, p-value < 2.2e-16
[1] "English Year 2 Pre-vocational (vmbo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 26122, df = 8, p-value < 2.2e-16
[1] "English Year 2 General secondary (havo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 16303, df = 8, p-value < 2.2e-16
[1] "English Year 2 Pre-university (vwo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 84027, df = 7, p-value < 2.2e-16
[1] "English Year 3 Other"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 16118, df = 6, p-value < 2.2e-16
[1] "English Year 3 Pre-vocational (vmbo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 20169, df = 6, p-value < 2.2e-16
[1] "English Year 3 General secondary (havo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 60993, df = 8, p-value < 2.2e-16
[1] "English Year 3 Pre-university (vwo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 41120, df = 7, p-value < 2.2e-16
[1] "English Year 4 Pre-vocational (vmbo)"
Pearson's Chi-squared test
data: dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, `:=`(school_year, NULL)]
X-squared = 20187, df = 5, p-value < 2.2e-16
Conclusion: all tests indicate a difference in proportions between school years (p << 0.001).
Make a combination plot for in the paper:
progress_lockdown_english <- progress_lockdown[level != "Other" & school_year == "19/20" & course == "English"]
progress_lockdown_english[, level := factor(level)]
p_change_english_y1 <- ggplot(progress_lockdown_english[year == "Year 1"], aes(colour = chapter_simple)) +
facet_grid(. ~ level, scales = "free_x", drop = FALSE) +
geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 1"][,level := factor(level)], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 1"][,level := factor(level)], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
geom_hline(yintercept = 0, lty = 2) +
geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
scale_y_continuous(breaks = c(-10, 0, 10), limits = c(-20, 20), labels = scales::number_format(suffix = " pp")) +
scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
guides(colour = FALSE) +
labs(x = NULL,
y = "Change") +
theme_paper +
theme(panel.grid.major.y = element_blank(),
strip.background = element_blank(),
strip.text.x = element_blank(),
panel.background = element_rect(fill = "transparent", colour = NA),
plot.background = element_rect(fill = "transparent", colour = NA))
p_change_english_y2 <- ggplot(progress_lockdown_english[year == "Year 2"], aes(colour = chapter_simple)) +
facet_grid(. ~ level, scales = "free_x", drop = FALSE) +
geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 2"][,level := factor(level)], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 2"][,level := factor(level)], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
geom_hline(yintercept = 0, lty = 2) +
geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
scale_y_continuous(breaks = c(-10, 0, 10), limits = c(-20, 20), labels = scales::number_format(suffix = " pp")) +
scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
guides(colour = FALSE) +
labs(x = NULL,
y = "Change") +
theme_paper +
theme(panel.grid.major.y = element_blank(),
strip.background = element_blank(),
strip.text.x = element_blank(),
panel.background = element_rect(fill = "transparent", colour = NA),
plot.background = element_rect(fill = "transparent", colour = NA))
p_change_english_y3 <- ggplot(progress_lockdown_english[year == "Year 3"], aes(colour = chapter_simple)) +
facet_grid(. ~ level, scales = "free_x", drop = FALSE) +
geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 3"][,level := factor(level)], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 3"][,level := factor(level)], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
geom_hline(yintercept = 0, lty = 2) +
geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
scale_y_continuous(breaks = c(-10, 0, 10), limits = c(-20, 20), labels = scales::number_format(suffix = " pp")) +
scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
guides(colour = FALSE) +
labs(x = NULL,
y = "Change") +
theme_paper +
theme(panel.grid.major.y = element_blank(),
strip.background = element_blank(),
strip.text.x = element_blank(),
panel.background = element_rect(fill = "transparent", colour = NA),
plot.background = element_rect(fill = "transparent", colour = NA))
p_change_english_y4 <- ggplot(progress_lockdown_english[year == "Year 4"], aes(colour = chapter_simple)) +
facet_grid(. ~ level, scales = "free_x", drop = FALSE) +
geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 4"][,level := factor(level)], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 4"][,level := factor(level)], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
geom_hline(yintercept = 0, lty = 2) +
geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
scale_y_continuous(breaks = c(-10, 0, 10), limits = c(-20, 20), labels = scales::number_format(suffix = " pp")) +
scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
guides(colour = FALSE) +
labs(x = NULL,
y = "Change") +
theme_paper +
theme(panel.grid.major.y = element_blank(),
strip.background = element_blank(),
strip.text.x = element_blank(),
panel.background = element_rect(fill = "transparent", colour = NA),
plot.background = element_rect(fill = "transparent", colour = NA))
filler_plot <- qplot() +
theme_nothing() +
theme(panel.background = element_rect(fill = "transparent", colour = NA),
plot.background = element_rect(fill = "transparent", colour = NA))
plot_grid(
filler_plot,
p_english_y1, filler_plot, p_change_english_y1, filler_plot,
p_english_y2, filler_plot, p_change_english_y2, filler_plot,
p_english_y3, filler_plot, p_change_english_y3, filler_plot,
p_english_y4, filler_plot, p_change_english_y4,
ncol = 1,
align = "hv", axis = "tblr",
labels = c(NA,
"Year 1", NA, NA, NA,
"Year 2", NA, NA, NA,
"Year 3", NA, NA, NA,
"Year 4", NA, NA),
rel_heights = c(.1,
1, -.2, .75, .1,
1, -.2, .75, .1,
1, -.2, .75, .1,
1, -.2, .75),
hjust = -0.1,
vjust = -0.1,
scale = .95)
Warning: Removed 61 rows containing missing values (position_stack).
Warning: Removed 61 rows containing missing values (geom_col).
Warning: Removed 69 rows containing missing values (position_stack).
Warning: Removed 60 rows containing missing values (geom_col).
Warning: Removed 49 rows containing missing values (position_stack).
Warning: Removed 50 rows containing missing values (geom_col).
Warning: Removed 7 rows containing missing values (position_stack).
Warning: Removed 4 rows containing missing values (geom_col).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
ggsave("../output/progress_combi_alt_english.pdf", width = 9, height = 11)
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
ggsave("../output/progress_combi_alt_english.eps", width = 9, height = 11)
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
resolveHJust(x$just, : semi-transparency is not supported on this device:
reported only once per page
ggsave("../output/progress_combi_alt_english.png", width = 9, height = 11)
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
Warning: Removed 1 rows containing missing values (geom_text).
sessionInfo()
R version 3.6.3 (2020-02-29)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 18.04.5 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=nl_NL.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=nl_NL.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=nl_NL.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] grid stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] lmerTest_3.1-0 lme4_1.1-21 Matrix_1.2-18 cowplot_0.9.4
[5] ggplot2_3.3.2 DBI_1.1.0 data.table_1.13.6
loaded via a namespace (and not attached):
[1] tidyselect_1.1.1 xfun_0.21 purrr_0.3.2
[4] splines_3.6.3 lattice_0.20-41 colorspace_1.4-1
[7] vctrs_0.3.8 htmltools_0.3.6 viridisLite_0.3.0
[10] yaml_2.2.0 utf8_1.1.4 blob_1.2.1
[13] rlang_0.4.10 pillar_1.4.2 nloptr_1.2.1
[16] glue_1.3.1 withr_2.3.0 bit64_0.9-7
[19] plyr_1.8.4 lifecycle_0.1.0 stringr_1.4.0
[22] munsell_0.5.0 gtable_0.3.0 rvest_0.3.4
[25] kableExtra_1.3.1 evaluate_0.14 memoise_1.1.0
[28] labeling_0.3 knitr_1.23 fansi_0.4.0
[31] Rcpp_1.0.6 scales_1.0.0 jsonlite_1.6
[34] webshot_0.5.2 bit_1.1-14 digest_0.6.19
[37] stringi_1.4.3 dplyr_0.8.3 numDeriv_2016.8-1.1
[40] cli_2.2.0 tools_3.6.3 magrittr_1.5
[43] tibble_2.1.3 RSQLite_2.2.0 crayon_1.3.4
[46] tidyr_1.0.0 pkgconfig_2.0.2 MASS_7.3-51.4
[49] ellipsis_0.3.0 xml2_1.2.0 ggridges_0.5.1
[52] httr_1.4.0 rstudioapi_0.13 assertthat_0.2.1
[55] minqa_1.2.4 rmarkdown_2.6 R6_2.4.0
[58] boot_1.3-25 nlme_3.1-149 compiler_3.6.3